tmp <- stack(sort(table(F[[i]]), decreasing=T) / 500)[1:min(length(table(F[[i]])), 25),]
colnames(tmp) <- c('freq', 'feature')
# plot
pdf(paste0('~/Downloads/', lNames[i], '_barplot.pdf'))
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none')
dev.off()
}
i
tmp
tmp <- stack(sort(table(F[[i]])) / 500)
head(tmp)
tail(tmp)
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
head(tmp)
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
if (nrow(tmp) > 25) {
tmp <- tmp[(nrow(tmp)-24):nrow(tmp),]
}
tmp
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none')
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim=c(0,1)
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim(0,1)
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim(0,1) +
geom_hline(yintercept=0.5)
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue', size=2)
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue') + theme_minimal()
lNames <- c('BTMs', 'BGCs', 'Caprion', 'Plasma_MetaB', 'Stool_MetaB')
for (i in 1:5) {
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
if (nrow(tmp) > 25) {
tmp <- tmp[(nrow(tmp)-24):nrow(tmp),]
}
# plot
pdf(paste0('~/Downloads/', lNames[i], '_barplot.pdf'))
ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue') + theme_minimal()
dev.off()
}
lNames <- c('BTMs', 'BGCs', 'Caprion', 'Plasma_MetaB', 'Stool_MetaB')
for (i in 1:5) {
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
if (nrow(tmp) > 25) {
tmp <- tmp[(nrow(tmp)-24):nrow(tmp),]
}
# plot
pdf(paste0('~/Downloads/', lNames[i], '_barplot.pdf'))
print(ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue') + theme_minimal())
dev.off()
}
lNames <- c('BTMs', 'BGCs', 'Caprion', 'Plasma_MetaB', 'Stool_MetaB')
for (i in 1:5) {
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
if (nrow(tmp) > 25) {
tmp <- tmp[(nrow(tmp)-24):nrow(tmp),]
}
# plot
pdf(paste0('~/Downloads/', lNames[i], '_barplot.pdf'))
print(ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme(legend.position='none') + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue'))
dev.off()
}
print(ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity') +
coord_flip() + theme_minimal(legend.position='none') + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue'))
?theme_minimal
print(ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity', show.legend=F) +
coord_flip() + theme_minimal() + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue'))
?geom_bar
for (i in 1:5) {
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
if (nrow(tmp) > 25) {
tmp <- tmp[(nrow(tmp)-24):nrow(tmp),]
}
# plot
pdf(paste0('~/Downloads/', lNames[i], '_barplot.pdf'))
print(ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity', show.legend=FALSE) +
coord_flip() + theme_minimal() + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue'))
dev.off()
}
RF=list()
for(i in seq(1,nviews)){
RF[[i]]<-names(which(table(F[[i]])>0.5*500))
print(RF[[i]])
}
RF=list()
for(i in seq(1,nviews)){
RF[[i]]<-names(which(table(F[[i]])>0.4*500))
print(RF[[i]])
}
lNames <- c('BTMs', 'BGCs', 'Caprion', 'Plasma_MetaB', 'Stool_MetaB')
for (i in 1:5) {
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
if (nrow(tmp) > 25) {
tmp <- tmp[(nrow(tmp)-24):nrow(tmp),]
}
# plot
pdf(paste0('~/Downloads/', lNames[i], '_barplot.pdf'))
print(ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity', show.legend=FALSE) +
coord_flip() + theme_minimal() + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue')) +
geom_hline(yintercept=0.4, linetype='dashed', color='black')
dev.off()
}
for (i in 1:5) {
tmp <- stack(sort(table(F[[i]])) / 500)
colnames(tmp) <- c('freq', 'feature')
if (nrow(tmp) > 25) {
tmp <- tmp[(nrow(tmp)-24):nrow(tmp),]
}
# plot
pdf(paste0('~/Downloads/', lNames[i], '_barplot.pdf'))
print(ggplot(tmp, aes(x=feature, y=freq, fill='red')) + geom_bar(stat='identity', show.legend=FALSE) +
coord_flip() + theme_minimal() + ylim(0,1) +
geom_hline(yintercept=0.5, linetype='dashed', color='blue') +
geom_hline(yintercept=0.4, linetype='dashed', color='black'))
dev.off()
}
max(table(F[[4]]))
max(table(F[[5]]))
a1 <- c(3,5)
a2 <- c(2,5,7)
sm <- 14
for (i in a1) {
for (j in a2) {
if (i != j) {
cat(paste(i, j, (sm - i - j), '\n'))
}
}
}
a1 <- c(1,4)
a2 <- c(2,9)
sm <- 13
for (i in a1) {
for (j in a2) {
if (i != j) {
cat(paste(i, j, (sm - i - j), '\n'))
}
}
}
a1 <- c(6,7)
a2 <- c(8,9)
sm <- 19
for (i in a1) {
for (j in a2) {
if (i != j) {
cat(paste(i, j, (sm - i - j), '\n'))
}
}
}
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("mixOmics")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("mixOmics")
library(mixOmics)
head(breast.TCGA)
data("breast.TCGA")
str(breast.TCGA)
data_fake <- list(X=list(layer1=matrix(rnorm(50*100, mean=5, sd=7), nrow=50, ncol=100),
layer2=matrix(rnorm(50*100, mean=3, sd=3), nrow=50, ncol=100),
layer3=matrix(rnorm(50*150, mean=10, sd=10), nrow=50, ncol=150)),
Y=as.factor(sample(c('group1', 'group2'), 50, T, c(0.3, 0.7)))
)
data_fake
data_fake$X
length(data_fake)
names(data_fake)
list.keepX <- list(layer1=c(5,10), layer2=c(5,10), layer3=c(10,20))
myResult.diablo <- block.splsda(data_fake$X, data_fake$Y, keepX=list.keepX)
myResult.diablo
plotIndiv(myResult.diablo)
for (i in 1:3) rownames(data_fake$X[i]) <- paste0('samp', 1:50)
data_fake$X[i]
rownames(data_fake$X[1])
# make fake data first
set.seed(390)
# make X matrices
X <- list(layer1=matrix(data=rnorm(50*100, mean=5, sd=3), nrow=50, dimnames=c(paste0('samp', 1:50),
paste0('l1f', 1:100))),
layer2=matrix(data=rnorm(50*75, mean=3, sd=1), nrow=50, dimnames=c(paste0('samp', 1:50),
paste0('l2f', 1:75))),
layer3=matrix(data=rnorm(50*150, mean=10, sd=7), nrow=50, dimnames=c(paste0('samp', 1:50),
paste0('l3f', 1:150))))
?matrix
# make fake data first
set.seed(390)
# make X matrices
X <- list(layer1=matrix(data=rnorm(50*100, mean=5, sd=3), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l1f', 1:100))),
layer2=matrix(data=rnorm(50*75, mean=3, sd=1), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l2f', 1:75))),
layer3=matrix(data=rnorm(50*150, mean=10, sd=7), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l3f', 1:150))))
X[[1]][1:5,1:5]
X[[2]][1:5,1:5]
X[[3]][1:5,1:5]
# make fake data first
set.seed(390)
# make X matrices
X <- list(layer1=matrix(data=rnorm(50*100, mean=5, sd=3), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l1_feat', 1:100))),
layer2=matrix(data=rnorm(50*75, mean=3, sd=1), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l2_feat', 1:75))),
layer3=matrix(data=rnorm(50*150, mean=10, sd=7), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l3_feat', 1:150))))
# make Y outcome variable
Y <- as.factor(sample(c('group1', 'group2'), 50, replace=T, prob=c(0.7, 0.3)))
myr <- block.splsda(X, Y, keepX=list(layer1=c(5,10), layer2=c(5,5), layer3=c(10,20)))
str(myr)
?block.splsda
data_fake <- list(X=X, Y=Y)
str(data_fake)
save(X, Y, file='~/Downloads/fake_data.RData')
rm(list=ls())
load('~/Downloads/fake_data.RData')
# make fake data first
set.seed(390)
# make X matrices
X <- list(layer1=matrix(data=rnorm(50*100, mean=5, sd=3), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l1_feat', 1:100))),
layer2=matrix(data=rnorm(50*75, mean=3, sd=1), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l2_feat', 1:75))),
layer3=matrix(data=rnorm(50*150, mean=10, sd=7), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l3_feat', 1:150))))
# make Y outcome variable
Y <- as.factor(sample(c('group1', 'group2'), 50, replace=T, prob=c(0.7, 0.3)))
# put in a list
fake_data <- list(X=X, Y=Y)
# save
save(fake_data, file='~/Downloads/fake_data.RData')
# make fake data first
set.seed(390)
# make X matrices
X <- list(layer1=matrix(data=rnorm(50*100, mean=5, sd=3), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l1_feat', 1:100))),
layer2=matrix(data=rnorm(50*75, mean=3, sd=1), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l2_feat', 1:75))),
layer3=matrix(data=rnorm(50*150, mean=10, sd=7), nrow=50, dimnames=list(paste0('samp', 1:50),
paste0('l3_feat', 1:150))))
# make Y outcome variable
Y <- as.factor(sample(c('group1', 'group2'), 50, replace=T, prob=c(0.7, 0.3)))
# put in a list
fake_data <- list(X=X, Y=Y)
# save
save(fake_data, file='~/Downloads/fake_data.RData')
# TCGA data second
library(mixOmics)
data("breast.TCGA")
# make X matrices
X <- list(mRNA = breast.TCGA$data.train$mrna,
miRNA = breast.TCGA$data.train$mirna,
protein = breast.TCGA$data.train$protein)
Y <- breast.TCGA$data.train$subtype
# put in a list
tcga_data <- list(X=X, Y=Y)
# save
save(tcga_data, file='~/Downloads/tcga_data.RData')
str(fake_data)
out <- block.splsda(X=fake_data$X, Y=fake_data$Y, keepX=list(layer1=c(10, 20), layer2=c(5,10), layer3=c(20,40)))
str(out)
plotDiablo(out)
plotDiablo(out, ncomp=1)
circosPlot(out, cutoff=0.7)
cimDiablo(out, color.blocks=c('darkorchid', 'brown1', 'lightgreen'), comp=1, margin=c(8,20))
dev.off()
cimDiablo(out, color.blocks=c('darkorchid', 'brown1', 'lightgreen'), comp=1)
myPerf <- perf(out, validation='Mfold', folds=5, nrepeat=10, dist='centroids.dist')
myPerf
myPerf$MajorityVote.error.rate
myPerf$MajorityVote
myauc <- auroc(out, roc.block='layer1', roc.comp=2)
myauc <- auroc(out, roc.block='layer2', roc.comp=2)
myauc <- auroc(out, roc.block='layer3', roc.comp=2)
out$design
?auroc
auroc
a1 <- c(2,4,8)
a2 <- c(2,8,9)
sm <- 19
for (i in a1) {
for (j in a2) {
if (i != j) {
cat(paste(i, j, (sm - i - j), '\n'))
}
}
}
a1 <- c(5,6,7)
a2 <- c(5,6,7)
sm <- 17
for (i in a1) {
for (j in a2) {
if (i != j) {
cat(paste(i, j, (sm - i - j), '\n'))
}
}
}
a1 <- c(5,6,7)
a2 <- c(5,6)
sm <- 17
for (i in a1) {
for (j in a2) {
if (i != j) {
cat(paste(i, j, (sm - i - j), '\n'))
}
}
}
sm
sum
summy <- 0
for (i in 1:3) summy <- summy + choose(3, i)
summy
summy <- 0
for (i in 1:5) summy <- summy + choose(5, i)
summy
?MCVfold.block.splsda
68 / 113
library(mixOmics)
data(breast.TCGA)
# extract training data and name each data frame
X <- list(mRNA = breast.TCGA$data.train$mrna,
miRNA = breast.TCGA$data.train$mirna,
protein = breast.TCGA$data.train$protein)
Y <- breast.TCGA$data.train$subtype
# design matrix
dsgn <- matrix(1, nrow=3, ncol=3, dimnames=list(names(X), names(X)))
diag(dsgn) <- 0
# without parallel
start1 <- Sys.time()
res1   <- tune.block.splsda(X, Y, ncomp=2,
test.keepX=list(mRNA=c(5:9, 10, 15, 20, 25),
miRNA=c(5:9, 10, 15, 20, 25),
protein=c(5:9, 10, 15, 20, 25)), design=dsgn)
stop1  <- Sys.time()
# with parallel
start2 <- Sys.time()
res2   <- tune.block.splsda(X, Y, ncomp=2, BPPARAM=BiocParallel::MulticoreParam(),
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
stop2  <- Sys.time()
# without parallel
start1 <- Sys.time()
res1   <- tune.block.splsda(X, Y, ncomp=2,
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
stop1  <- Sys.time()
# with parallel
start2 <- Sys.time()
res2   <- tune.block.splsda(X, Y, ncomp=2, BPPARAM=BiocParallel::MulticoreParam(),
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
stop2  <- Sys.time()
stop1-start1
stop2-start2
start1 <- Sys.time()
res1   <- tune.block.splsda(X, Y, ncomp=2,
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
stop1  <- Sys.time()
# with parallel
start2 <- Sys.time()
res2   <- tune.block.splsda(X, Y, ncomp=2, BPPARAM=BiocParallel::MulticoreParam(workers=2),
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
stop2  <- Sys.time()
stop1-start1
stop2-start2
?BiocParallel::bplapply
start1 <- Sys.time()
res1   <- tune.block.splsda(X, Y, ncomp=2, nrepeat=5,
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
stop1  <- Sys.time()
# with parallel
start2 <- Sys.time()
res2   <- tune.block.splsda(X, Y, ncomp=2, nrepeat=5, BPPARAM=BiocParallel::MulticoreParam(workers=5),
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
stop2  <- Sys.time()
stop1-start1
stop2-start2
res3 <- tune.block.splsda(X, Y, ncomp=2, nrepeat=1,
test.keepX=list(mRNA=c(5, 10, 15),
miRNA=c(5, 10, 15),
protein=c(5, 10, 15)), design=dsgn)
summary(res1)
res1$choice.keepX
?tune.block.splsda
res1$choice.ncomp
stop1-start1
stop2-start2
?cor
?cor.test
x <- rnorm(100)
y <- rnorm(100)
ct <- cor.test(x, y, alternative='positive')
ct <- cor.test(x, y, alternative='greater')
ct
library(SNFtool)
?affinityMatrix
?SNF
?gls
??gls
install.packages('BoostMLR')
?switch
invisible(pi)
switch(1, invisible(pi), pi)
switch(2, invisible(pi), pi)
# load libraries
library(SingleCellExperiment) # to handle daFrame
library(diffcyt)              # for getting proper data for GLM
setwd('~/Documents/sepsis/manuscript/analyses/bin')
# load daFrame
load("../results/daFrame.RData")
# load metadata file
md <- read.csv("../data/metadata.csv")
# set necessary variables
clustering_to_use <- "merged_clusters"
code_id           <- colData(daf)$cluster_id
cluster_id        <- metadata(daf)$cluster_codes[, clustering_to_use][code_id]
clustering_name   <- clustering_to_use
# add columns to colData
colData(daf)$code_id    <- code_id
colData(daf)$cluster_id <- cluster_id
# make experiment_info data frame
experiment_info <- merge(md,
metadata(daf)$experiment_info[, c("sample_id",
"n_cells")],
by = "sample_id")
metadata        <- metadata(daf)
# split cells by combined sample
cs_by_s <- split(seq_len(ncol(daf)), colData(daf)$sample_id)
# re-order according to experiment_info
cs <- unlist(cs_by_s[as.character(experiment_info$sample_id)])
es <- t(assays(daf)[["exprs"]])[cs, , drop = FALSE]
# create SummarizedExperiment
d_se <- SummarizedExperiment(
assays   = list(exprs = es),
rowData  = colData(daf)[cs, ],
colData  = rowData(daf),
metadata = metadata
)
# calculate cell counts per sample
d_counts <- calcCounts(d_se)
# create proportion matrix
prop_mat <- d_counts@assays@data$counts / 4000 # we downsampled to 4000 cells
# set up result data frame
rslt <- data.frame(cluster = seq_len(nrow(prop_mat)),
t.value = rep(0, nrow(prop_mat)),
p.value = rep(0, nrow(prop_mat)))
# loop
for (i in seq_len(nrow(prop_mat))) {
# GLM
mod <- glm(prop_mat[i, ] ~ md$condition, family = "quasibinomial")
# add to result data frame
rslt[i, ] <- c(i, summary(mod)$coef[2, 3], summary(mod)$coef[2, 4])
}
# multiple testing correction
rslt$fdr <- p.adjust(rslt$p.value, method = "fdr")
# order by p-value
rslt <- rslt[order(rslt$p.value), ]
# write to table
write.table(x = rslt,
file = "../results/glm_results.csv",
quote = F,
sep = ",",
row.names = F)
lintr::lint_dir('..')
lintr::lint_dir('..')
lintr::lint_dir('..')
length(fs_raw)
length(md)
seq_len(length(md))
lintr::lint_dir('..')
lintr::lint('run_flow_cyto_pipeline.R')
?cluster
cluster
library(CATALYST)
cluster
lintr::lint('run_flow_cyto_pipeline.R')
?mergeClusters
lintr::lint('run_flow_cyto_pipeline.R')
lintr::lint('run_flow_cyto_pipeline.R')
